version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220613          // set seed

*-------------------------------------------------------
* project: daylight saving time (dst)
*-------------------------------------------------------

local pgm  "dst-data01_acs_educ_attainment_clean"         // file name
local who  "Muzhe Yang"                                   // author
local dte  "2022-06-13"                                   // created date
local dte2 "`c(current_date)'"                            // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\management\\`pgm'.txt", replace text
display "`tag'"

import excel "data_raw\acs\acs_educ_attainment.xlsx", sheet("acs_educ_attainment") firstrow clear

gen double educ_hs = B15002_calc_numHSE
gen double educ_hs_pct = B15002_calc_pctHSE
gen double educ_coll = B15002_calc_numGEBAE
gen double educ_coll_pct = B15002_calc_pctGEBAE

label variable educ_hs       "pop. 25 yrs and over: highest educ completed is high school"
label variable educ_hs_pct   "% of pop. 25 yrs and over: highest educ completed is high school"
label variable educ_coll     "pop. 25 yrs and over: highest educ completed is bachelor's degree or higher"
label variable educ_coll_pct "% of pop. 25 yrs and over: highest educ completed is bachelor's degree or higher"

order educ_hs educ_coll educ_hs_pct educ_coll_pct, after(County)
keep GEOID State County /// 
     educ_hs educ_coll educ_hs_pct educ_coll_pct
codebook, compact
compress
save "data_clean\acs\\`pgm'.dta", replace 

log close
exit